"""
持续性学习数据集生成(1-700)
获取DHF1K中的Daily activity, Animal, Artifact+Landscape 三种类别数据,

"""


import pandas as pd

# 读取Excel文件
file_path = 'DHF1k_attribute-all.xlsx'
df = pd.read_excel(file_path)

# 如果Excel文件中有多个表单，可以使用sheet_name参数指定要读取的表单
# df = pd.read_excel(file_path, sheet_name='Sheet1')

# 显示前几行数据
def get_video_id_by_column_name(name):
    for idx in df.columns:
        if df[idx][0] == name:
            return df.loc[df[idx] == 1, df.columns[0]].tolist()

def add_prefix(nums):
    return [str(num).zfill(4) for num in nums]

daily_activity_list = add_prefix(get_video_id_by_column_name('Daily activity'))
animal_list = add_prefix(get_video_id_by_column_name('Animal'))
artifact_landscape_list = add_prefix(get_video_id_by_column_name('Artifact') + get_video_id_by_column_name('Landscape'))

print(len(daily_activity_list))

assert len(daily_activity_list) == 127
assert len(animal_list) == 133
assert len(artifact_landscape_list) == 161

with open('daily_activity.txt', 'w') as file:
    for num in daily_activity_list:
        file.write(num + '\n')

with open('animal.txt', 'w') as file:
    for num in animal_list:
        file.write(num + '\n')

with open('artifact_landscape.txt', 'w') as file:
    for num in artifact_landscape_list:
        file.write(num + '\n')





























